cd "directory"

/*solar applications*/
foreach var in PGE SCE SDGE {
import delimited "solar_`var'.csv", clear
keep if customersector == "Residential"
replace technologytype = "Solar PV" if technologytype == "Solar"
replace technologytype = "Solar PV" if technologytype == "SOLAR PV"
keep if technologytype == "Solar PV"
keep if systemsizeac > 0
gen EV = electricvehicle == "Yes"
ren servicezip zip
keep applicationid precedingid supercedingid applicationstatus utility systemsizedc systemsizeac nemtariff EV appreceiveddate appcompletedate appapproveddate decommissioneddate zip
save "solar_`var'.dta", replace
}
use "solar_PGE.dta", clear
foreach var in SCE SDGE {
append using "solar_`var'.dta", force
}
save "solar.dta", replace

/*end*/
use "solar.dta", clear
keep if applicationstatus == "Decommissioned"
gen date_end = date(decommissioneddate, "YMD")
format date_end %td
gen year = year(date_end)
egen count_end = group(applicationid)
ren systemsizeac area_end
collapse (count) count_end (sum) area_end, by(year zip)
save "solar_end.dta", replace

/*new*/
use "solar.dta", clear
keep if precedingid == ""
gen date_new = date(appreceiveddate, "YMD")
format date_new %td
gen year = year(date_new)
egen count_new = group(applicationid)
ren systemsizeac area_new
collapse (count) count_new (sum) area_new, by(year zip)
save "solar_new.dta", replace

/*add*/
use "solar.dta", clear
keep if precedingid != ""
gen date_add = date(appreceiveddate, "YMD")
format date_add %td
gen year = year(date_add)
egen count_add = group(applicationid)
ren systemsizeac area_add
collapse (count) count_add (sum) area_add, by(year zip)
save "solar_add.dta", replace

/*zipcode zcta crosswalk*/
import excel "zip_to_zcta.xlsx", sheet("Zip_to_ZCTA_2016") firstrow case(lower) clear
destring zip_code, gen(zip)
destring zcta, replace
keep zip zcta
save "zip_zcta_crosswalk.dta", replace

/*zcta county crosswalk*/
import delimited "zcta_county.txt", clear 
keep zcta5 county
ren zcta5 zcta
ren county fips
collapse (firstnm) fips, by(zcta)
save "zcta_county_crosswalk.dta", replace

/*solar merge*/
use "solar_new.dta", clear

merge 1:1 zip year using "solar_end.dta"
drop _merge
merge 1:1 zip year using "solar_add.dta"
drop _merge

foreach var in count_new area_new count_end area_end count_add area_add {
replace `var' = 0 if `var' == .
}
merge m:1 zip using "zip_zcta_crosswalk.dta"
drop if _merge != 3
drop _merge

collapse (sum) count_new area_new count_end area_end count_add area_add, by(year zcta)

gen count = count_new - count_end + count_add
gen area = area_new - area_end + area_add

sort zcta year
by zcta: gen cum_count = sum(count)
by zcta: gen cum_area = sum(area)
save "merge_zip_year.dta", replace

/*EV ownership*/
import delimited "vehicle_ownership_zip.csv", clear
drop if zip < 90000
collapse (sum) numberofvehicles, by (datayear zip fueltype)
ren datayear year
ren numberofvehicles ownership

egen id_type = group(fueltype)
egen id = group(year zip)
reshape wide ownership fueltype, i(id) j(id_type)
ren ownership1 ownership_biodiesel
ren ownership2 ownership_diesel
ren ownership3 ownership_electric
ren ownership4 ownership_flexfuel
ren ownership5 ownership_gasoline
ren ownership6 ownership_hybrid
ren ownership7 ownership_hydrogen
ren ownership8 ownership_gas
ren ownership9 ownership_PHEV
ren ownership10 ownership_propane

merge m:1 zip using "zip_zcta_crosswalk.dta"
drop if _merge != 3
drop _merge
collapse (sum) ownership_biodiesel ownership_diesel ownership_electric ownership_flexfuel ownership_gasoline ownership_hybrid ownership_hydrogen ownership_gas ownership_PHEV ownership_propane, by(zcta year)

gen ownership = ownership_biodiesel + ownership_diesel + ownership_electric + ownership_flexfuel + ownership_gasoline + ownership_hybrid + ownership_hydrogen + ownership_gas + ownership_PHEV + ownership_propane
gen ownership_EV = ownership_electric + ownership_PHEV
gen ownership_nonEV = ownership - ownership_EV

tsset zcta year
foreach var in biodiesel diesel electric flexfuel gasoline hybrid hydrogen gas PHEV propane EV nonEV {
gen sales_`var' = ownership_`var' - L.ownership_`var'
}
gen sales = ownership - L.ownership

foreach var in biodiesel diesel electric flexfuel gasoline hybrid hydrogen gas PHEV propane EV nonEV {
gen share_`var' = sales_`var'/sales
}
save "vehicle_ownership_zip", replace

/*EV sales*/
import delimited "vehicle_sales_zip.csv", clear
drop if zip < 90000
collapse (sum) numberofvehicles, by (datayear zip fueltype)
ren datayear year
ren numberofvehicles sales

egen id_type = group(fueltype)
egen id = group(year zip)
reshape wide sales fueltype, i(id) j(id_type)
ren sales1 sales_electric0
ren sales2 sales_hydrogen0
ren sales3 sales_PHEV0

merge m:1 zip using "zip_zcta_crosswalk.dta"
drop if _merge != 3
drop _merge
collapse (sum) sales_electric0 sales_hydrogen0 sales_PHEV0, by(zcta year)
gen sales_EV0 = sales_electric0 + sales_PHEV0
save "vehicle_sales_zip", replace

/*GHI*/
forvalues y = 2011/2016 {
import delimited "CA_GHI_zip_`y'.txt", clear
gen year = `y'
save "CA_GHI_zip_`y'", replace
}
use "CA_GHI_zip_2011.dta", clear
forvalues y = 2012/2016 {
append using "CA_GHI_zip_`y'.dta"
}
keep zcta5ce10 mean year
ren zcta5ce10 zcta
ren mean ghi
save "CA_GHI_zip", replace

/*HOV lanes*/
forvalues y = 2011/2016 {
import delimited "HOV_length_`y'.txt", clear
ren z zcta
cap ren length hov_length
keep zcta hov_length
collapse (sum) hov_length, by(zcta)
gen year = `y'
save "zcta_HOV_length_`y'.dta", replace
}
use "zcta_HOV_length_2011.dta", clear
forvalues y = 2012/2016 {
append using "zcta_HOV_length_`y'.dta"
}
save "zcta_HOV_length.dta", replace

/*demo*/
forvalues y = 2011/2016 {
import delimited "demo_`y'.csv", clear
split name, p(" ")
ren name2 zcta
ren dp05_0001e pop
ren dp05_0002pe male
ren dp05_0017e age
ren dp05_0059pe white
ren dp05_0081e housing
destring zcta pop male age white housing, replace force
gen year = `y'
keep year zcta pop male age white housing
save "demo_`y'", replace
}
use "demo_2011", clear
forvalues y = 2012/2016 {
append using "demo_`y'"
}
save demo, replace

/*income*/
forvalues y = 2011/2016 {
import delimited "income_`y'.csv", clear
split name, p(" ")
ren name2 zcta
ren s1901_c01_001e household
ren s1901_c01_002e income_10k
ren s1901_c01_003e income_15k
ren s1901_c01_004e income_25k
ren s1901_c01_005e income_35k
ren s1901_c01_006e income_50k
ren s1901_c01_007e income_75k
ren s1901_c01_008e income_100k
ren s1901_c01_009e income_150k
ren s1901_c01_010e income_200k
ren s1901_c01_011e income_200kmore
ren s1901_c01_012e income_median
ren s1901_c01_013e income_mean
destring zcta household income_*, replace force
gen year = `y'
keep year zcta household income_*
save "income_`y'", replace
}
use "income_2011", clear
forvalues y = 2012/2016 {
append using "income_`y'"
}
save income, replace

/*edu*/
forvalues y = 2011/2016 {
import delimited "edu_`y'.csv", clear
split name, p(" ")
ren name2 zcta
ren s1501_c01_006e pop_25
ren s1501_c01_007e grade9
ren s1501_c01_008e grade12
ren s1501_c01_009e highschool
ren s1501_c01_010e somecollege
ren s1501_c01_011e accociate
ren s1501_c01_012e bachelor
ren s1501_c01_013e graduate
ren s1501_c01_014e highschool_more
ren s1501_c01_015e bachelor_more
destring zcta pop_25 grade9 grade12 highschool somecollege accociate bachelor graduate highschool_more bachelor_more, replace force
gen year = `y'
keep year zcta pop_25 grade9 grade12 highschool somecollege accociate bachelor graduate highschool_more bachelor_more
save "edu_`y'", replace
}
use "edu_2011", clear
forvalues y = 2012/2016 {
append using "edu_`y'"
}
save edu, replace

/*NSC rate*/
/*SDGE*/
import excel "Net Surplus Compensation Rate.xlsx", sheet("SDGE") firstrow clear
ren month month0
gen month = 1
replace month = 2 if month0 == "February"
replace month = 3 if month0 == "March"
replace month = 4 if month0 == "April"
replace month = 5 if month0 == "May"
replace month = 6 if month0 == "June"
replace month = 7 if month0 == "July"
replace month = 8 if month0 == "August"
replace month = 9 if month0 == "September"
replace month = 10 if month0 == "October"
replace month = 11 if month0 == "November"
replace month = 12 if month0 == "December"
gen ym=ym(year, month)
format ym %tm
gen utility = "SDGE"
keep NSC_rate year month ym utility
save "NSC_rate_SDGE.dta", replace

/*SCE*/
import excel "Net Surplus Compensation Rate.xlsx", sheet("SCE") firstrow clear
gen year = year(Time)
gen month = month(Time)
gen ym=ym(year, month)
format ym %tm
gen utility = "SCE"
keep NSC_rate year month ym utility
save "NSC_rate_SCE.dta", replace

/*PGE*/
import delimited "NP15.csv", clear
gen date = date(tradedate, "YMD")
format date %td
gen year = year(date)
gen month = month(date)
collapse highpricemwh lowpricemwh wtdavgpricemwh, by(year month)
gen ym = ym(year, month)
format ym %tm
tsset ym
foreach var in highpricemwh lowpricemwh wtdavgpricemwh {
gen `var'_12 = (`var' + L.`var' + L2.`var' + L3.`var' + L4.`var' + L5.`var' + L6.`var' + L7.`var' + L8.`var' + L9.`var' + L10.`var' + L11.`var')/12
}
keep if year>=2011 & year<=2016
collapse highpricemwh_12 lowpricemwh_12 wtdavgpricemwh_12, by(year)
ren lowpricemwh_12 NSC_rate
keep NSC_rate year
replace NSC_rate = NSC_rate/10
gen utility = "PGE"
save "NSC_rate_PGE.dta", replace

/*NSC rate merge*/
use NSC_rate_SDGE, clear
append using NSC_rate_SCE
replace NSC_rate = 100*NSC_rate
label var NSC_rate "NSC Rate (cent/kWh)"
label var year "Year"
label var month "Month"
label var ym "Time"
label var utility "Utility Company"

collapse NSC_rate, by(utility year)
keep if year>=2011 & year<=2016
append using NSC_rate_PGE
save NSC_rate_year, replace

/*city zcata crosswalk*/
import excel "city_zip.xlsx", sheet("Sheet1") firstrow clear

merge m:1 zip using "zip_zcta_crosswalk.dta"
drop if _merge != 3
drop _merge
collapse (firstnm) city, by(zcta)
save city_zcta, replace

/*gasoline price*/
foreach var in Bakersfield Chico Fresno Modesto Oakland Orange_County Riverside Sacramento Salinas San_Bernardino San_Diego San_Jose Santa_Barbara Stockton Ventura LA SF {
import delimited "price_gasoline_`var'.csv", clear
gen date = date(v1, "YMD")
gen year = year(date)
ren v2 price_gasoline_city
collapse price_gasoline_city, by(year)
gen city = "`var'"
save "price_gasoline_`var'", replace
}
use "price_gasoline_Bakersfield", clear
foreach var in Chico Fresno Modesto Oakland Orange_County Riverside Sacramento Salinas San_Bernardino San_Diego San_Jose Santa_Barbara Stockton Ventura LA SF {
append using "price_gasoline_`var'"
}
replace city = "Orange County" if city == "Orange_County"
replace city = "San Bernardino" if city == "San_Bernardino"
replace city = "San Diego" if city == "San_Diego"
replace city = "San Jose" if city == "San_Jose"
replace city = "Santa Barbara" if city == "Santa_Barbara"
egen id = group(city)
tsset id year
gen price_gasoline_city_1 = L.price_gasoline_city
keep city year price_gasoline_city price_gasoline_city_1
save "price_gasoline_city", replace

import delimited "price_gasoline_CA.csv", clear
gen date = date(v1, "YMD")
gen year = year(date)
ren v2 price_gasoline_CA
collapse price_gasoline_CA, by(year)
tsset year
gen price_gasoline_CA_1 = L.price_gasoline_CA
save "price_gasoline_CA", replace

/*CPI*/
import excel "CA_CPI.xlsx", sheet("Sheet1") firstrow clear
label var adjusted_CPI "Adjusted CPI"
label var CPI_worker "CPI Worker"
label var adjusted_CPI_worker "Adjusted CPI Worker"
save CA_CPI, replace

/*EV station*/
import delimited "EV_station.csv", clear
gen date = date(opendate, "YMD")
format date %td
gen year = year(date)
gen month = month(date)
gen ym = ym(year, month)
format ym %tm
drop if year == .

collapse (count) id, by (ym zip)
destring zip, replace force
ren id new_station

merge m:1 zip using "zip_zcta_crosswalk.dta"
drop if _merge != 3
drop _merge

collapse (sum) new_station, by (ym zcta)
tsset zcta ym
tsfill, full
replace new_station = 0 if new_station == .

gen date = dofm(ym)
format date %td
gen year = year(date)
collapse (sum) new_station, by (year zcta)

tsset zcta year
sort zcta year
by zcta: gen station = sum(new_station)
gen station_1 = L.station

save EV_station, replace

/*final merge*/
use "CA_GHI_zip.dta", clear
drop if zcta < 90000

merge 1:1 zcta year using "CA_DNI_zip.dta"
drop _merge

merge 1:1 zcta year using "merge_zip_year.dta"
drop if _merge == 2
drop _merge

foreach var in count_new area_new count_end area_end count_add area_add count area cum_count cum_area {
replace `var' = 0 if `var'== .
}

merge 1:1 zcta year using "vehicle_ownership_zip.dta"
drop if _merge != 3
drop _merge

merge 1:1 zcta year using "vehicle_sales_zip.dta"
drop if _merge == 2
drop _merge

foreach var in electric hydrogen PHEV EV {
replace sales_`var'0 = 0 if sales_`var'0== .
}
merge m:1 zcta using "zcta_county_crosswalk.dta"
drop if _merge != 3
drop _merge

merge 1:1 zcta year using "zcta_HOV_length.dta"
drop if _merge == 2
drop _merge
replace hov_length = 0 if hov_length == .

merge 1:1 zcta year using "demo"
drop if _merge != 3
drop _merge

merge 1:1 zcta year using "income"
drop if _merge != 3
drop _merge

merge 1:1 zcta year using "edu"
drop if _merge != 3
drop _merge

merge m:1 zcta using "zcta_utility_crosswalk.dta"
drop if _merge == 2
drop _merge

merge m:1 utility year using NSC_rate_year
drop if _merge == 2
drop _merge

merge m:1 year using CA_CPI
drop if _merge != 3
drop _merge

gen NSC_rate_CPI = NSC_rate/adjusted_CPI

merge m:1 year using price_gasoline_CA
drop if _merge != 3
drop _merge

merge m:1 zcta using city_zcta
drop if _merge == 2
drop _merge

merge m:1 city year using price_gasoline_city
drop if _merge == 2
drop _merge

gen price_gasoline = price_gasoline_CA
replace price_gasoline = price_gasoline_city if price_gasoline_city != .

gen price_gasoline_1 = price_gasoline_CA_1
replace price_gasoline_1 = price_gasoline_city_1 if price_gasoline_city != .

merge 1:1 zcta year using "EV_station.dta"
drop if _merge == 2
drop _merge
replace station = 0 if station == .
replace station_1 = 0 if station_1 == .

replace highschool_more = (highschool + somecollege + accociate + bachelor + graduate)/pop_25 if highschool_more == .
replace bachelor_more = (bachelor + graduate)/pop_25 if bachelor_more == .

ren city city0
merge m:1 zcta using "zcta_city_crosswalk.dta"
drop if _merge != 3
drop _merge

/*drop extreme values*/
drop if cum_count>1080
drop if sales>1200

save "merge_zip_year_final.dta", replace
